import pyspark.sql.functions as func
from pyspark import SparkContext, SparkConf, SparkFiles
from pyspark.sql import SQLContext, Row
import ConfigParser as configparser
import os
from datetime import datetime
from vina_utils import get_directory_pdb_analysis, get_ligand_from_receptor_ligand_model
from database_io import load_database
from hydrogen_bond_io import load_file_select_hydrogen_bond, load_file_all_residue_hbonds, load_only_poses_file_hydrogen_bond, load_only_poses_file_hydrogen_bond_normalized_by_residues, load_file_summary_normalized_hbonds
from hydrogen_bond_crud import create_df_residue_list, create_df_all_residue, create_df_all_residue_filtered_by_res_list, get_group_by_poses_all_residue_filtered_by_res_list, create_df_normalized_by_donors_acceptors, create_df_normalized_by_heavy_atoms

def save_result_only_pose_normalized_by_residue_list_heavy_atoms(path_file_result_file_only_pose, df_result):
	list_aux = []	
	f_file = open(path_file_result_file_only_pose, "w")
	header = "# poses and normalized hydrogen bond by heavy_atoms that were filtered by residues from hydrogen bond\n"
	f_file.write(header)				
	for row in df_result.collect():
		normalized_value  = "{:.4f}".format(row.normalized_hb) 		
		line = str(row.pose)+"\t"+str(normalized_value)+"\n"
		f_file.write(line)
	f_file.close()


def save_result_only_pose_normalized_by_residue_list(path_file_result_file_only_pose, df_result):
	list_aux = []	
	f_file = open(path_file_result_file_only_pose, "w")
	header = "# poses and normalized hydrogen bond that were filtered by residues from hydrogen bond\n"
	f_file.write(header)				
	for row in df_result.collect():
		normalized_value  = "{:.4f}".format(row.normalized_hb) 		
		line = str(row.pose)+"\t"+str(normalized_value)+"\n"
		f_file.write(line)
	f_file.close()


def save_result_only_pose(path_file_result_file_only_pose, df_result):
	list_aux = []	
	f_file = open(path_file_result_file_only_pose, "w")
	header = "# poses and number of hydrogen bond hat were filtered by residues from hydrogen bond\n"
	f_file.write(header)				
	for row in df_result.collect(): 		
		line = str(row.pose)+"\t"+str(row.num_res)+"\n"
		f_file.write(line)
	f_file.close()

def save_result(path_file_result_file, df_result):
	f_file = open(path_file_result_file, "w")
	header = "#ligand_atom\taccept_or_donate\treceptor_residue\treceptor_atom\tdistance[A]\tangle[deg]\tpose\n"
	f_file.write(header)				
	for row in df_result.collect():
		line = str(row.ligand_atom)+"\t"+str(row.accept_or_donate)+"\t"+str(row.receptor_residue)+"\t"+str(row.receptor_atom)+"\t"+str(row.distance)+"\t"+str(row.angle)+"\t"+str(row.pose)+"\n"
		f_file.write(line)				
	f_file.close()

def save_log(finish_time, start_time):
	log_file_name = 'hydrogen_bond_residue_selection.log'
	current_path = os.getcwd()
	path_file = os.path.join(current_path, log_file_name)
	log_file = open(path_file, 'w')

	diff_time = finish_time - start_time
	msg = 'Starting ' + str(start_time) +'\n'
	log_file.write(msg)
	msg = 'Finishing ' + str(finish_time) +'\n'
	log_file.write(msg)
	msg = 'Time Execution (seconds): ' + str(diff_time.total_seconds()) +'\n'
	log_file.write(msg)


def main():

	config = configparser.ConfigParser()
	config.read('config.ini')

	#Number of poses to select by buried area
	number_poses_to_select_hydrogen_bond = int(config.get('DRUGDESIGN', 'number_poses_to_select_hydrogen_bond') )
	# list of residues to select buried area
	file_select_hydrogen_bond = config.get('DRUGDESIGN', 'file_residue_to_select_hydrogen_bond')
	#Path that contains all files for analysis
	path_analysis = config.get('DEFAULT', 'path_analysis')	
	#Path where all pdb receptor are
	path_receptor = config.get('DEFAULT', 'pdb_path')
	#Ligand Database file
	ligand_database  = config.get('DEFAULT', 'ligand_database_path_file')	
	#Path for saving pdb files of models generated by VS
	path_ligand = get_directory_pdb_analysis(path_analysis)		
	#File for saving the filtered buried area
	result_file_to_select_hydrogen_bond = config.get('DRUGDESIGN', 'result_file_to_select_hydrogen_bond')
	#File for saving the filtered buried area only poses
	result_file_to_select_hydrogen_bond_only_pose = config.get('DRUGDESIGN', 'result_file_to_select_hydrogen_bond_only_pose')
	result_file_to_select_normalized_hydrogen_bond_only_pose = config.get('DRUGDESIGN', 'result_file_to_select_normalized_hydrogen_bond_only_pose')	
	result_file_to_select_normalized_heavy_atom_hydrogen_bond_only_pose = config.get('DRUGDESIGN', 'result_file_to_select_normalized_heavy_atom_hydrogen_bond_only_pose')		
	#Path where saved the selected compelex
	path_to_save = os.path.join("selected_complexo", "hydrogen_bond")
	path_to_save = os.path.join(path_analysis, path_to_save)
	if not os.path.exists(path_to_save):
		os.makedirs(path_to_save)
	#Path where saved the normalized selected compelex	
	path_to_save_normalized_da = os.path.join("selected_complexo", "normalized_hydrogen_bond_donors_acceptors")
	path_to_save_normalized_da = os.path.join(path_analysis, path_to_save_normalized_da)
	if not os.path.exists(path_to_save_normalized_da):
		os.makedirs(path_to_save_normalized_da)
	path_to_save_normalized_heavyAtom = os.path.join("selected_complexo", "normalized_hydrogen_bond_heavyAtom")
	path_to_save_normalized_heavyAtom = os.path.join(path_analysis, path_to_save_normalized_heavyAtom)
	if not os.path.exists(path_to_save_normalized_heavyAtom):
		os.makedirs(path_to_save_normalized_heavyAtom)
	#Path where saved the normalized by residue list selected compelex	
	path_to_save_normalized_residue = os.path.join("selected_complexo", "normalized_hydrogen_bond_residue_donors_acceptors")
	path_to_save_normalized_residue = os.path.join(path_analysis, path_to_save_normalized_residue)
	if not os.path.exists(path_to_save_normalized_residue):
		os.makedirs(path_to_save_normalized_residue)

	path_to_save_normalized_residue_heavyAtoms = os.path.join("selected_complexo", "normalized_hydrogen_bond_residue_heavyAtoms")
	path_to_save_normalized_residue_heavyAtoms = os.path.join(path_analysis, path_to_save_normalized_residue_heavyAtoms)
	if not os.path.exists(path_to_save_normalized_residue_heavyAtoms):
		os.makedirs(path_to_save_normalized_residue_heavyAtoms)

	# Create SPARK config
	maxResultSize = str(config.get('SPARK', 'maxResultSize'))
	conf = (SparkConf().set("spark.driver.maxResultSize", maxResultSize))

	# Create context
	sc = SparkContext(conf=conf)
	sqlCtx = SQLContext(sc)

	start_time = datetime.now()

	#Broadcast
	path_to_save_b = sc.broadcast(path_to_save) 
	path_receptor_b = sc.broadcast(path_receptor) 
	path_ligand_b = sc.broadcast(path_ligand) 

	#Adding Python Source file
	#Path for drugdesign project
	path_spark_drugdesign = config.get('DRUGDESIGN', 'path_spark_drugdesign')	
	sc.addPyFile(os.path.join(path_spark_drugdesign,"vina_utils.py"))
	sc.addPyFile(os.path.join(path_spark_drugdesign,"database_io.py"))
	sc.addPyFile(os.path.join(path_spark_drugdesign,"hydrogen_bond_io.py"))
	sc.addPyFile(os.path.join(path_spark_drugdesign,"hydrogen_bond_crud.py"))
	sc.addPyFile(os.path.join(path_spark_drugdesign,"json_utils.py"))

	#load all-residue_hbonds_4.0A_30.0deg.dat file
	path_file_hydrogen_bond = os.path.join(path_analysis, "all-residue_hbonds_4.0A_30.0deg.dat")
	all_residue_split = load_file_all_residue_hbonds(sc, path_file_hydrogen_bond)

	#Creating all_residue Dataframe
	df_all_residue = create_df_all_residue(sqlCtx, all_residue_split)

	if os.path.isfile(file_select_hydrogen_bond):
		#Creating resudue list as Dataframe
		residue_listRDD = load_file_select_hydrogen_bond(sc, file_select_hydrogen_bond)
		df_residue_list = create_df_residue_list(sqlCtx, residue_listRDD)		

		df_result = create_df_all_residue_filtered_by_res_list(sqlCtx)
		#Saving result
		path_file_result_file = os.path.join(path_analysis, result_file_to_select_hydrogen_bond)
		save_result(path_file_result_file, df_result)	

		#Grouping by poses
		df_result = get_group_by_poses_all_residue_filtered_by_res_list(sqlCtx)

		#Saving result only pose
		path_file_result_file_only_pose = os.path.join(path_analysis, result_file_to_select_hydrogen_bond_only_pose)
		save_result_only_pose(path_file_result_file_only_pose, df_result)	

		#Loading all poses group by poses
		only_poseRDD = load_only_poses_file_hydrogen_bond(sc, path_file_result_file_only_pose)
		only_pose_takeRDD = only_poseRDD.take(number_poses_to_select_hydrogen_bond)

		#Calculating normalized hydrogen bond

		#Loading database
		rdd_database = load_database(sc, ligand_database)
		#Creating Dataframe
		database_table = sqlCtx.createDataFrame(rdd_database)	
		database_table.registerTempTable("database")

		#Creating Dataframe normalized_by_donors_acceptors		
		df_result = create_df_normalized_by_donors_acceptors(sqlCtx, df_result)
		#Saving result only pose by normalized hydrogen bond
		path_file_result_file_only_pose = os.path.join(path_analysis, result_file_to_select_normalized_hydrogen_bond_only_pose)
		save_result_only_pose_normalized_by_residue_list(path_file_result_file_only_pose, df_result)	

		#Loading poses - normalized_residues_filtered_by_list
		only_pose_normalizedRDD = load_only_poses_file_hydrogen_bond_normalized_by_residues(sc, path_file_result_file_only_pose)
		only_pose_normalizedRDD = only_pose_normalizedRDD.take(number_poses_to_select_hydrogen_bond)
		
		# Normalized Hydrogen Bond by heavy atoms
		df_result = create_df_normalized_by_heavy_atoms(sqlCtx)			

		#Saving result only pose by normalized buried area
		path_file_result_file_only_pose = os.path.join(path_analysis, result_file_to_select_normalized_heavy_atom_hydrogen_bond_only_pose)
		save_result_only_pose_normalized_by_residue_list_heavy_atoms(path_file_result_file_only_pose, df_result)	

		#Loading poses - normalized_residues_filtered_by_list
		only_pose_normalized_heavyAtomsRDD = load_only_poses_file_hydrogen_bond_normalized_by_residues(sc, path_file_result_file_only_pose)
		only_pose_normalized_heavyAtomsRDD = only_pose_normalized_heavyAtomsRDD.take(number_poses_to_select_hydrogen_bond)

#************** END OF RESIDUE LIST

	#Loading normalized poses by donors and acceptors
	path_file_normalized_pose = os.path.join(path_analysis, "summary_normalized_hbonds_donors_acceptors_4.0A_30.0deg.dat")
	normalized_poseRDD = load_file_summary_normalized_hbonds(sc, path_file_normalized_pose)

	normalized_poseRDD = normalized_poseRDD.take(number_poses_to_select_hydrogen_bond)

	#Loading normalized poses by heavy atoms
	path_file_normalized_pose = os.path.join(path_analysis, "summary_normalized_hbonds_heavyAtom_4.0A_30.0deg.dat")
	normalized_pose_heavyAtomsRDD = load_file_summary_normalized_hbonds(sc, path_file_normalized_pose)

	normalized_pose_heavyAtomsRDD = normalized_pose_heavyAtomsRDD.take(number_poses_to_select_hydrogen_bond)

# ******************** STARTED FUNCTION ********************************
	def build_complex_from_pose_file_name(p_name):
		from vina_utils import get_receptor_from_receptor_ligand_model, get_ligand_from_receptor_ligand_model, get_model_from_receptor_ligand_model, get_separator_filename_mode
		#Broadcast
		path_to_save = path_to_save_b.value
		path_receptor = path_receptor_b.value
		path_ligand = path_ligand_b.value
		#Based on row value from dataframe
		pose_file_name =  p_name.pose

		#Receptor
		receptor_file_name = get_receptor_from_receptor_ligand_model(pose_file_name)				
		receptor_file = os.path.join(path_receptor, receptor_file_name+".pdb")
		f_receptor_file = open(receptor_file,"r")
		#ligand file name
		ligand_file_name = os.path.join(path_ligand, pose_file_name+".pdb")
		f_ligand_file_name = open(ligand_file_name,"r")

		#Open file for writting the complex
		full_path_for_save_complex = os.path.join(path_to_save, p_name.f_name+".pdb")
		f_compl = open(full_path_for_save_complex, "w")
		#Insert lines of receptor
		for item in  f_receptor_file:
			if str(item).find("END") == -1:
				f_compl.write(item)
		#Insert lines of model
		for item in f_ligand_file_name:		
			if str(item).find("REMARK") == -1:
				f_compl.write(item)
		#Closing files
		f_compl.close()
		f_ligand_file_name.close()
		f_receptor_file.close()
# ******************** FINISHED FUNCTION ********************************
	
	if os.path.isfile(file_select_hydrogen_bond):
		#Selecting poses by residues filtered
		sc.parallelize(only_pose_takeRDD).foreach(build_complex_from_pose_file_name)
		#Updated path to save complex			
		path_to_save_b = sc.broadcast(path_to_save_normalized_residue) 
		sc.parallelize(only_pose_normalizedRDD).foreach(build_complex_from_pose_file_name)				
		#Updated path to save complex
		path_to_save_b = sc.broadcast(path_to_save_normalized_residue_heavyAtoms) #Updated path to save complex	
		sc.parallelize(only_pose_normalized_heavyAtomsRDD).foreach(build_complex_from_pose_file_name)				


	#Selecting poses by normalized donors and acceptors
	#Broadcast
	path_to_save_b = sc.broadcast(path_to_save_normalized_da) #Updated path to save complex
	sc.parallelize(normalized_poseRDD).foreach(build_complex_from_pose_file_name)

	#Selecting poses by normalized heavy atoms
	#Broadcast
	path_to_save_b = sc.broadcast(path_to_save_normalized_heavyAtom) #Updated path to save complex
	sc.parallelize(normalized_pose_heavyAtomsRDD).foreach(build_complex_from_pose_file_name)

	finish_time = datetime.now()

	save_log(finish_time, start_time)

main()